View assignment here
from __future__ import division
import numpy as np
import pdb
# MOVIES: Legally Blond; Matrix; Bourne Identity; You’ve Got Mail;
# The Devil Wears Prada; The Dark Knight; The Lord of the Rings.
movie_titles = ['Legally Blond', 'Matrix', 'Bourne Identity',
'You’ve Got Mail', 'The Devil Wears Prada',
'The Dark Knight', 'The Lord of the Rings']
P = [[ 0, 0,-1, 0,-1, 1, 1], # User 1
[-1, 1, 1,-1, 0, 1, 1], # User 2
[ 0, 1, 1, 0, 0,-1, 1], # User 3
[-1, 1, 1, 0, 0, 1, 1], # User 4
[ 0, 1, 1, 0, 0, 1, 1], # User 5
[ 1,-1, 1, 1, 1,-1, 0], # User 6
[-1, 1,-1, 0,-1, 0, 1], # User 7
[ 0,-1, 0, 1, 1,-1,-1], # User 8
[ 0, 0,-1, 1, 1, 0,-1]] # User 9
P = np.array(P)
C = np.abs(P) # Will be 0 only when P[i,j] == 0.
print('Raw Preference Matrix:')
print(P)
Raw Preference Matrix: [[ 0 0 -1 0 -1 1 1] [-1 1 1 -1 0 1 1] [ 0 1 1 0 0 -1 1] [-1 1 1 0 0 1 1] [ 0 1 1 0 0 1 1] [ 1 -1 1 1 1 -1 0] [-1 1 -1 0 -1 0 1] [ 0 -1 0 1 1 -1 -1] [ 0 0 -1 1 1 0 -1]]
# Parameters
reg = 0.1 # regularization parameter
f = 2 # number of factors
m,n = P.shape
#Random Initialization
# X is (m x f)
# Y is (f x n)
X = 1 - 2*np.random.rand(m,f)
Y = 1 - 2*np.random.rand(f,n)
X *= 0.1
Y *= 0.1
# Alternating Ridge Regression
for _ in xrange(100):
# Least-squares keeping Y fixed
X = np.linalg.solve(
np.dot(Y, Y.T) + reg * np.eye(f),
np.dot(Y, P.T)
).T
# Least-squares keeping X fixed
Y = np.linalg.solve(
np.dot(X.T, X) + reg * np.eye(f),
np.dot(X.T, P)
)
print('Alternating Ridge Regression:')
print(np.dot(X,Y))
print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))
Alternating Ridge Regression: [[-0.47047868 0.42881288 -0.92322674 -0.40431625 -0.82818449 0.71377038 0.35042258] [-0.57771631 1.11906024 0.94589649 -0.6138595 -0.41293394 0.72761475 1.24992152] [-0.08329304 0.47515025 1.23776778 -0.15067507 0.26037126 0.02607104 0.61430612] [-0.4797323 0.97615665 0.95005888 -0.51903623 -0.29509116 0.59242618 1.10279904] [-0.37593946 0.83720164 0.99806064 -0.42105221 -0.1576005 0.44610297 0.96413601] [ 0.67254752 -0.68837628 1.05514875 0.59290466 1.10703169 -1.00139291 -0.60521603] [-0.56690331 0.63195467 -0.7079191 -0.51001528 -0.88042456 0.83110331 0.58167835] [ 0.65941592 -0.96038474 0.03268862 0.63788069 0.79442036 -0.91013103 -0.98826639] [ 0.32613592 -0.67760237 -0.69495382 0.35562611 0.186357 -0.39923578 -0.76905741]] Error for movies that users actually rated: 5.73
# Re-initialize
X = 1 - 2*np.random.rand(m,f)
Y = 1 - 2*np.random.rand(f,n)
X *= 0.1
Y *= 0.1
# Alternating Weighted Ridge Regression
for _ in xrange(100):
# Each user u has a different set of weights Cu
for u,Cu in enumerate(C):
X[u] = np.linalg.solve(
np.dot(Y, np.dot(np.diag(Cu), Y.T)) + reg * np.eye(f),
np.dot(Y, np.dot(np.diag(Cu), P[u].T))
).T
for i,Ci in enumerate(C.T):
Y[:,i] = np.linalg.solve(
np.dot(X.T, np.dot(np.diag(Ci), X)) + reg * np.eye(f),
np.dot(X.T, np.dot(np.diag(Ci), P[:, i]))
)
print('Alternating Weighted Ridge Regression:')
print(np.dot(X,Y))
print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))
Alternating Weighted Ridge Regression: [[-0.8785934 0.75591055 -0.84723564 -0.90527554 -0.91301451 1.22456781 0.7703408 ] [-0.96653212 1.12037936 0.83860294 -1.00626954 -1.00818432 0.72382856 1.10750498] [-0.32540594 0.55731684 1.38658549 -0.34526083 -0.34179019 -0.14502803 0.53505323] [-0.96804918 1.12194066 0.83870998 -1.00784188 -1.00976418 0.72539036 1.10906571] [-0.95829148 1.11193016 0.83821615 -0.99772977 -0.99960303 0.71527644 1.09905574] [ 0.92243741 -0.80265369 0.83420677 0.95077543 0.95869447 -1.26620706 -0.81690604] [-1.02704309 0.8941689 -0.92578349 -1.05861242 -1.06741822 1.40873236 0.90998839] [ 0.95137278 -0.98077431 -0.07728943 0.98609905 0.99077203 -0.97584608 -0.98024936] [ 0.89692922 -1.06910816 -0.9585243 0.93486255 0.93596746 -0.60822952 -1.05423328]] Error for movies that users actually rated: 2.01
not_C = abs(1 - C) # movies not rated
P_hat = np.dot(X, Y)
top_movie_id = np.argmax(P_hat - (4*C), axis=1)
for u, tm_id in zip(range(m), top_movie_id):
print('User %d liked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == 1])))
print('User %d disliked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == -1])))
print('For user %d the top movie is movie n.%d (%s) - predicted vote %.2f\n'% \
(u+1, tm_id+1, movie_titles[tm_id], P_hat[u,tm_id]))
User 1 liked The Dark Knight, The Lord of the Rings User 1 disliked Bourne Identity, The Devil Wears Prada For user 1 the top movie is movie n.2 (Matrix) - predicted vote 0.76 User 2 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings User 2 disliked Legally Blond, You’ve Got Mail For user 2 the top movie is movie n.5 (The Devil Wears Prada) - predicted vote -1.01 User 3 liked Matrix, Bourne Identity, The Lord of the Rings User 3 disliked The Dark Knight For user 3 the top movie is movie n.1 (Legally Blond) - predicted vote -0.33 User 4 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings User 4 disliked Legally Blond For user 4 the top movie is movie n.4 (You’ve Got Mail) - predicted vote -1.01 User 5 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings User 5 disliked For user 5 the top movie is movie n.1 (Legally Blond) - predicted vote -0.96 User 6 liked Legally Blond, Bourne Identity, You’ve Got Mail, The Devil Wears Prada User 6 disliked Matrix, The Dark Knight For user 6 the top movie is movie n.7 (The Lord of the Rings) - predicted vote -0.82 User 7 liked Matrix, The Lord of the Rings User 7 disliked Legally Blond, Bourne Identity, The Devil Wears Prada For user 7 the top movie is movie n.6 (The Dark Knight) - predicted vote 1.41 User 8 liked You’ve Got Mail, The Devil Wears Prada User 8 disliked Matrix, The Dark Knight, The Lord of the Rings For user 8 the top movie is movie n.1 (Legally Blond) - predicted vote 0.95 User 9 liked You’ve Got Mail, The Devil Wears Prada User 9 disliked Bourne Identity, The Lord of the Rings For user 9 the top movie is movie n.1 (Legally Blond) - predicted vote 0.90